

estimation_data <-
  estimation_data %>%
  mutate(hte_campaign_expend = case_when(campaign_expend == "High expenditure" ~ 1,
                                         campaign_expend == "Low expenditure" ~ 0,
                                         TRUE ~ NA_real_),
         hte_prob_success = case_when(campaign_prob_success == "High chance" ~ 1,
                                      campaign_prob_success == "Low chance" ~ 0,
                                      TRUE ~ NA_real_),
         hte_javanese = case_when(V_6 == "Jawa" ~ 1,
                                  TRUE ~ 0),
         hte_islam = case_when(V_5 == "Islam" ~ 1,
                               TRUE ~ 0),
         hte_age = case_when(V_2 > 50 ~ 1,
                             TRUE ~ 0),
         hte_islamist = case_when(islam_party_recode_plot == "Islamist" ~ 1,
                                  TRUE ~ 0),
         hte_pdip = case_when(D == "PDIP" ~ 1,
                              TRUE ~ 0),
         hte_gender = case_when(V_1 == "Laki-laki" ~ 1,
                                TRUE ~ 0),
         hte_rwa = case_when(rwa_recode_plot == "High RWA" ~ 1,
                             TRUE ~ 0),
         hte_neurot = case_when(neurot_recode_plot == "High neuroticism" ~ 1,
                                TRUE ~ 0),
         hte_incumbent = case_when(incumbent_flag == "Incumbent" ~ 1,
                                   TRUE ~ 0))





indiv_outcomes <- estimation_data |>
  select(V_28, V_29, V_31_A, V_31_B, V_32_A, V_32_B, V_32_C) %>%
  map_dfr(~ tibble(
    n_obs = sum(!is.na(.x)),
    mean = mean(.x, na.rm = TRUE),
    std_dev = sd(.x, na.rm = TRUE),
    min = min(.x, na.rm = TRUE),
    median = median(.x, na.rm = TRUE),
    max = max(.x, na.rm = TRUE)
  ), .id = "var_name") |>
  mutate(source = "Individual Items (Outcomes)")

index_outcomes <- estimation_data |>
  select(avg_score_trust_elect, avg_score_democ_support) %>%
  map_dfr(~ tibble(
    n_obs = sum(!is.na(.x)),
    mean = mean(.x, na.rm = TRUE),
    std_dev = sd(.x, na.rm = TRUE),
    min = min(.x, na.rm = TRUE),
    median = median(.x, na.rm = TRUE),
    max = max(.x, na.rm = TRUE)
  ), .id = "var_name") |>
  mutate(source = "Indexed Items (Outcomes)")



hte_vars <- estimation_data |>
  select(starts_with("hte")) %>%
  map_dfr(~ tibble(
    n_obs = sum(!is.na(.x)),
    mean = mean(.x, na.rm = TRUE),
    std_dev = sd(.x, na.rm = TRUE),
    min = min(.x, na.rm = TRUE),
    median = median(.x, na.rm = TRUE),
    max = max(.x, na.rm = TRUE)
  ), .id = "var_name") |>
  mutate(source = "Sources of Heterogeneity")


covariates <- estimation_data |>
  mutate(cov_outcome = case_when(validated_election_outcome == "Elected" ~ 1,
                                 TRUE ~ 0),
         cov_college_educ = case_when(education_recode_plot %in% c("Bachelor's", "Postgraduate") ~ 1,
                                      TRUE ~ 0),
         cov_income_over_10 = case_when(income_recode_plot %in% c("10-20m IDR", ">20m IDR") ~ 1,
                                        TRUE ~ 0),
         cov_list_pos = as.numeric(list_position_recode_plot),
         cov_outsider_present = case_when(Y2 == "Ada" ~ 1,
                                          TRUE ~ 0)) %>%
  select(starts_with("cov_")) %>%
  map_dfr(~ tibble(
    n_obs = sum(!is.na(.x)),
    mean = mean(.x, na.rm = TRUE),
    std_dev = sd(.x, na.rm = TRUE),
    min = min(.x, na.rm = TRUE),
    median = median(.x, na.rm = TRUE),
    max = max(.x, na.rm = TRUE)
  ), .id = "var_name") |>
  mutate(source = "Other Covariates")






descriptive_stats_summary <- rbind(indiv_outcomes,
                                   index_outcomes,
                                   hte_vars,
                                   covariates)

descriptive_stats_summary <- descriptive_stats_summary |>
  group_by(source)




descriptive_stats_summary %>%
  print(, n= 50)
#change the names of all the "var_name" observations to what we want in the table

stats_summary_df <- descriptive_stats_summary |>
  mutate(var_name = case_when(var_name == "V_28" ~ "Regional media bias",
                              var_name == "V_29" ~ "Vote total reflects the will of voters",
                              var_name == "V_31_A" ~ "Trust: Electoral commission (KPU)",
                              var_name == "V_31_B" ~ "Trust: Election supervisory committee (Bawaslu)",
                              var_name == "V_32_A" ~ "Agree: Democracy is the best form for Indonesia",
                              var_name == "V_32_B" ~ "Agree: Democracy is the cause of the chaos in our government",
                              var_name == "V_32_C" ~ "Agree: Democracy is the reason for our bad economy",
                              var_name == "avg_score_trust_elect" ~ "Trust in Elections",
                              var_name == "avg_score_democ_support" ~ "Support for Democracy",
                              var_name == "hte_campaign_expend" ~ "Campaign Expenditures: High",
                              var_name == "hte_prob_success" ~ "Expectations of Success: High",
                              var_name == "hte_javanese" ~ "Javanese",
                              var_name == "hte_islam" ~ "Muslim",
                              var_name == "hte_age" ~ "Age: > 50",
                              var_name == "hte_islamist" ~ "Party: Islamist",
                              var_name == "hte_pdip" ~ "Party: PDIP",
                              var_name == "hte_gender" ~ "Gender: Man",
                              var_name == "hte_rwa" ~ "RWA: High",
                              var_name == "hte_neurot" ~ "Neuroticism: High",
                              var_name == "hte_incumbent" ~ "Incumbent",
                              var_name == "cov_outcome" ~ "Election Outcome: Won",
                              var_name == "cov_college_educ" ~ "College Educated",
                              var_name == "cov_income_over_10" ~ "Income > 10m IDR",
                              var_name == "cov_list_pos" ~ "List Position",
                              var_name == "cov_urban" ~ "District: Urban",
                              var_name == "cov_java" ~ "Location: Java",
                              var_name == "cov_sumatera" ~ "Location: Sumatera",
                              var_name == "cov_kalimantan" ~ "Location: Kalimantan",
                              var_name == "cov_outsider_present" ~ "Outsider Present for Interview"
  ))

stats_summary_df <- stats_summary_df %>%
  mutate(across(
    c(mean, std_dev, min, median, max),
    ~ ifelse(
      is.na(.), NA_character_,  # handle NAs gracefully
      ifelse(
        . %% 1 == 0, as.character(round(.)),  # whole number: no decimal
        format(round(., 2), nsmall = 2, trim = TRUE)  # round & keep up to 2 decimals
      )
    )
  ))


#### make the table with summary stats ####

# caption
cap <-
  "\\label{tab:sumstat}Summary statistics."

# Get group sizes for pack_rows BEFORE removing the source column
group_index <- table(fct_inorder(stats_summary_df$source))

# Cleaned data without 'source' column (7 columns only)
summary_clean_df <- stats_summary_df %>%
  ungroup() %>%
  select(-source)

# Generate table
tab <- summary_clean_df %>%
  kable(
    "latex",
    booktabs = TRUE,
    escape = FALSE,
    longtable = TRUE,
    cap = cap,
    col.names = linebreak(c(
      "Measure", "N", "Mean", "St. Dev.", "Min.", "Median", "Max."
    )),
    align = c("l", rep("c", 6)),  # 7 total
    linesep = ""
  ) %>%
  kable_styling(
    font_size = 9,
    latex_options = c("scale_down", "hold_position", "striped")
  ) %>%
  pack_rows(
    index = group_index,
    bold = TRUE,
    underline = TRUE,
    italic = TRUE
  )


writeLines(tab, "./outputs/tables/table_a2.tex")

